podatki <- read.table("/cloud/project/Poglavje 5/Naloga 2/Priljubljenost.csv", header=TRUE, sep=";", dec=",")

head(podatki)
##   Razred Osnovnošolec Priljubljenost Ekstravertiranost Spol
## 1      1            1            6.3                 5    1
## 2      1            2            4.9                 7    0
## 3      1            3            5.3                 4    1
## 4      1            4            4.7                 3    1
## 5      1            5            6.0                 5    1
## 6      1            6            4.7                 4    0

Opis spremenljivk:

podatki$SpolFaktor <- factor(podatki$Spol, 
                             levels = c(0, 1), 
                             labels = c("fant", "dekle"))
fit_OLS <- lm(Priljubljenost ~ Ekstravertiranost + SpolFaktor, 
              data = podatki)

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:DescTools':
## 
##     Recode
vif(fit_OLS)
## Ekstravertiranost        SpolFaktor 
##           1.00803           1.00803
podatki$StdOst <- round(rstandard(fit_OLS), 3)
podatki$CooksD <- round(cooks.distance(fit_OLS), 3)

hist(podatki$StdOst,
     main = "Histogram standardiziranih ostankov",
     ylab = "Frekvenca",
     xlab = "Standardizirani ostanki")

podatki$StdOcene <- scale(fit_OLS$fitted.values)

library(ggplot2)
ggplot(podatki, aes(y=StdOst, x=StdOcene)) +
  theme_linedraw() +
  geom_point() +
  xlab("Standardizirane ocenjene vrednosti") +
  ylab("Standardizirani ostanki")

head(podatki[order(podatki$StdOst), c("Razred", "Osnovnošolec", "StdOst")])
##      Razred Osnovnošolec StdOst
## 1622     82            9 -3.951
## 627      31           18 -3.038
## 1632     82           19 -3.015
## 949      48           10 -2.934
## 1619     82            6 -2.929
## 1989    100            9 -2.929
head(podatki[order(-podatki$StdOst), c("Razred", "Osnovnošolec", "StdOst")])
##      Razred Osnovnošolec StdOst
## 1666     84           14  2.831
## 1266     64            4  2.826
## 1273     64           12  2.826
## 1268     64            6  2.815
## 364      18           14  2.732
## 588      29           13  2.726
head(podatki[order(-podatki$CooksD), c("Razred", "Osnovnošolec", "CooksD")])
##      Razred Osnovnošolec CooksD
## 1274     64           13  0.010
## 659      33            7  0.008
## 627      31           18  0.007
## 1268     64            6  0.007
## 588      29           13  0.006
## 656      33            4  0.006
podatkiC <- podatki[c(-1622, -627, -1632, -1274),  ]

fit_OLS <- lm(Priljubljenost ~ Ekstravertiranost + SpolFaktor, 
              data = podatkiC)

summary(fit_OLS)
## 
## Call:
## lm(formula = Priljubljenost ~ Ekstravertiranost + SpolFaktor, 
##     data = podatkiC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1635 -0.6694 -0.0487  0.7365  3.0454 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.79289    0.10288   27.15   <2e-16 ***
## Ekstravertiranost  0.29412    0.01903   15.45   <2e-16 ***
## SpolFaktordekle    1.49113    0.04797   31.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.067 on 1993 degrees of freedom
## Multiple R-squared:  0.3952, Adjusted R-squared:  0.3946 
## F-statistic: 651.2 on 2 and 1993 DF,  p-value: < 2.2e-16
fit_ML <- glm(Priljubljenost ~ Ekstravertiranost + SpolFaktor, 
              data = podatkiC)

summary(fit_ML)
## 
## Call:
## glm(formula = Priljubljenost ~ Ekstravertiranost + SpolFaktor, 
##     data = podatkiC)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1635  -0.6694  -0.0487   0.7365   3.0454  
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.79289    0.10288   27.15   <2e-16 ***
## Ekstravertiranost  0.29412    0.01903   15.45   <2e-16 ***
## SpolFaktordekle    1.49113    0.04797   31.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 1.138731)
## 
##     Null deviance: 3752.5  on 1995  degrees of freedom
## Residual deviance: 2269.5  on 1993  degrees of freedom
## AIC: 5928.7
## 
## Number of Fisher Scoring iterations: 2
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## --------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## --------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename,
##     summarise, summarize
## The following object is masked from 'package:ggpubr':
## 
##     mutate
Statistika <- summarySE(podatkiC, 
                        measurevar="Priljubljenost", 
                        groupvars=c("Razred"), 
                        conf.interval=0.95)
head(Statistika)
##   Razred  N Priljubljenost        sd        se        ci
## 1      1 20       5.075000 0.9535943 0.2132302 0.4462959
## 2      2 20       4.105000 0.9991970 0.2234273 0.4676386
## 3      3 18       4.694444 1.1649147 0.2745730 0.5792984
## 4      4 23       5.434783 0.8138621 0.1697020 0.3519404
## 5      5 21       5.285714 0.9509394 0.2075120 0.4328624
## 6      6 20       4.335000 0.9675117 0.2163422 0.4528094
library(ggplot2)
ggplot(Statistika, aes(x=Razred, y=Priljubljenost)) +
  geom_bar(position=position_dodge(), stat="identity") +
  geom_errorbar(aes(ymin=Priljubljenost-ci, ymax=Priljubljenost+ci),
                width=0.1,                    
                position=position_dodge(.9)) +
  theme_linedraw() 

library(lme4)
library(lmerTest)
hierar_fit0 <- lmer(Priljubljenost ~ (1 | Razred), 
                    REML = FALSE,  
                    data = podatkiC)

summary(hierar_fit0)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: Priljubljenost ~ (1 | Razred)
##    Data: podatkiC
## 
##      AIC      BIC   logLik deviance df.resid 
##   6302.5   6319.3  -3148.2   6296.5     1993 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2185 -0.6993  0.0004  0.6718  3.3314 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Razred   (Intercept) 0.680    0.8246  
##  Residual             1.211    1.1005  
## Number of obs: 1996, groups:  Razred, 100
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)   5.0819     0.0861 99.7584   59.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hierar_fit1 <- lmer(Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 | Razred), 
                    REML = FALSE,  
                    data = podatkiC)

summary(hierar_fit1)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 | Razred)
##    Data: podatkiC
## 
##      AIC      BIC   logLik deviance df.resid 
##   4907.4   4935.4  -2448.7   4897.4     1991 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.05053 -0.66397 -0.00815  0.67481  2.99750 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Razred   (Intercept) 0.6081   0.7798  
##  Residual             0.5837   0.7640  
## Number of obs: 1996, groups:  Razred, 100
## 
## Fixed effects:
##                     Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)          2.14898    0.11621  401.82925   18.49   <2e-16
## Ekstravertiranost    0.44080    0.01608 1955.58073   27.41   <2e-16
## SpolFaktordekle      1.25043    0.03716 1924.95455   33.65   <2e-16
##                      
## (Intercept)       ***
## Ekstravertiranost ***
## SpolFaktordekle   ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Ekstrv
## Ekstrvrtrns -0.708       
## SpolFktrdkl -0.101 -0.085
anova(hierar_fit0, hierar_fit1, test="Chi")
## Data: podatkiC
## Models:
## hierar_fit0: Priljubljenost ~ (1 | Razred)
## hierar_fit1: Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 | Razred)
##             npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## hierar_fit0    3 6302.5 6319.3 -3148.2   6296.5                     
## hierar_fit1    5 4907.4 4935.4 -2448.7   4897.4 1399.1  2  < 2.2e-16
##                
## hierar_fit0    
## hierar_fit1 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hierar_fit2 <- lmer(Priljubljenost ~ Ekstravertiranost + SpolFaktor + Ekstravertiranost:SpolFaktor + (1 | Razred), 
                 REML = FALSE,  
                 data = podatkiC)


summary(hierar_fit2)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## Priljubljenost ~ Ekstravertiranost + SpolFaktor + Ekstravertiranost:SpolFaktor +  
##     (1 | Razred)
##    Data: podatkiC
## 
##      AIC      BIC   logLik deviance df.resid 
##   4908.7   4942.3  -2448.3   4896.7     1990 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0348 -0.6725 -0.0016  0.6817  3.0342 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Razred   (Intercept) 0.6086   0.7801  
##  Residual             0.5834   0.7638  
## Number of obs: 1996, groups:  Razred, 100
## 
## Fixed effects:
##                                     Estimate Std. Error         df
## (Intercept)                          2.21684    0.14075  755.83468
## Ekstravertiranost                    0.42739    0.02248 1926.23227
## SpolFaktordekle                      1.11999    0.15697 1920.96280
## Ekstravertiranost:SpolFaktordekle    0.02529    0.02957 1923.95786
##                                   t value Pr(>|t|)    
## (Intercept)                        15.750  < 2e-16 ***
## Ekstravertiranost                  19.016  < 2e-16 ***
## SpolFaktordekle                     7.135 1.37e-12 ***
## Ekstravertiranost:SpolFaktordekle   0.855    0.393    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Ekstrv SplFkt
## Ekstrvrtrns -0.812              
## SpolFktrdkl -0.568  0.664       
## Ekstrvrt:SF  0.564 -0.699 -0.972
hierar_fit3 <- lmer(Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 + Ekstravertiranost | Razred), 
                 REML = FALSE,  
                 data = podatkiC)

summary(hierar_fit3)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 + Ekstravertiranost |  
##     Razred)
##    Data: podatkiC
## 
##      AIC      BIC   logLik deviance df.resid 
##   4842.8   4882.0  -2414.4   4828.8     1989 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.05257 -0.65655 -0.00629  0.67289  3.09288 
## 
## Random effects:
##  Groups   Name              Variance Std.Dev. Corr 
##  Razred   (Intercept)       2.42166  1.5562        
##           Ekstravertiranost 0.02672  0.1635   -0.94
##  Residual                   0.54917  0.7411        
## Number of obs: 1996, groups:  Razred, 100
## 
## Fixed effects:
##                     Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)          2.09146    0.17742   96.66425   11.79   <2e-16
## Ekstravertiranost    0.44264    0.02273   93.08654   19.47   <2e-16
## SpolFaktordekle      1.24310    0.03632 1909.17058   34.23   <2e-16
##                      
## (Intercept)       ***
## Ekstravertiranost ***
## SpolFaktordekle   ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Ekstrv
## Ekstrvrtrns -0.911       
## SpolFktrdkl -0.062 -0.062
anova(hierar_fit1, hierar_fit3, test="Chi")
## Data: podatkiC
## Models:
## hierar_fit1: Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 | Razred)
## hierar_fit3: Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 + Ekstravertiranost | Razred)
##             npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## hierar_fit1    5 4907.4 4935.4 -2448.7   4897.4                     
## hierar_fit3    7 4842.8 4882.0 -2414.4   4828.8 68.606  2  1.266e-15
##                
## hierar_fit1    
## hierar_fit3 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
podatkiC$Ocena_hierar_fit3 <- predict(hierar_fit3)

podatkiC[5, ]
##   Razred Osnovnošolec Priljubljenost Ekstravertiranost Spol
## 5      1            5              6                 5    1
##   SpolFaktor StdOst CooksD StdOcene Ocena_hierar_fit3
## 5      dekle  0.225      0 0.785374          5.751353